library(tidyverse)
## ── Attaching core tidyverse packages ─────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(glue)
library(SingleCellExperiment)
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
##
## The following object is masked from 'package:dplyr':
##
## count
##
##
## Attaching package: 'MatrixGenerics'
##
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
##
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
##
## The following objects are masked from 'package:lubridate':
##
## intersect, setdiff, union
##
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
##
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
##
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
##
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
##
## The following objects are masked from 'package:lubridate':
##
## second, second<-
##
## The following objects are masked from 'package:dplyr':
##
## first, rename
##
## The following object is masked from 'package:tidyr':
##
## expand
##
## The following object is masked from 'package:utils':
##
## findMatches
##
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
##
## The following object is masked from 'package:glue':
##
## trim
##
## The following object is masked from 'package:lubridate':
##
## %within%
##
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
##
## The following object is masked from 'package:purrr':
##
## reduce
##
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
##
## Attaching package: 'Biobase'
##
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
##
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
library(lemur)
##
## Attaching package: 'lemur'
##
## The following object is masked from 'package:dplyr':
##
## vars
##
## The following object is masked from 'package:ggplot2':
##
## vars
source("util.R")
fit_small <- qs::qread("../benchmark/output/glioblastoma/fit_hvg10k-small.qs")
fit_small <- fit_small[,fit_small$colData$condition != "etoposide" & fit_small$colData$patient_id != "PW029"]
# Flip x axis for prettier figures
reducedDim(fit_small, "fit_al_umap")[,1] <- -reducedDim(fit_small, "fit_al_umap")[,1]
reducedDim(fit_small, "fit_umap")[,1] <- -reducedDim(fit_small, "fit_umap")[,1]
reducedDim(fit_small, "UMAP")[,2] <- -reducedDim(fit_small, "UMAP")[,2]
nei_pan_small <- qs::qread("../benchmark/output/glioblastoma/fit_hvg10k-nei_pan-small.qs")
nei_pan_mod <- qs::qread("../benchmark/output/glioblastoma/fit_hvg10-nei_pan-without_neighborhood.qs")
nei_pan_tumor_small <- qs::qread("../benchmark/output/glioblastoma/fit_hvg10k-nei_pan_tumor-small.qs")
psce_lmo2 <- qs::qread("../benchmark/output/glioblastoma/fit_hvg10k-lmo2_psce.qs")
full_row_data <- qs::qread("../benchmark/output/glioblastoma/fit_hvg10-rowdata.qs")
# cell_type_colors <- structure(ggsci::pal_npg()(4), names = c("Tumor", "T cell", "Oligodendrocytes", "Myeloid"))
cell_type_colors <- structure(c("#FAC1BD", "#8dd3c7", "#80b1d3", "#b3de69"), names = c("Tumor cells", "T cells", "Oligodendrocytes", "Myeloid cells"))
ggplot(enframe(cell_type_colors), aes(x = name, y = 1)) +
geom_tile(aes(fill = value)) +
scale_fill_identity()
# RColorBrewer::display.brewer.all()
# condition_colors<- structure(RColorBrewer::brewer.pal(n = 3, name = "Set2"), names = c("ctrl", "panobinostat", "etoposide"))
condition_colors <- structure(c("#fc8d62", "#8da0cb"), names = c("ctrl", "panobinostat"))
ggplot(enframe(condition_colors), aes(x = name, y = 1)) +
geom_tile(aes(fill = value)) +
scale_fill_identity()
sel_genes <- c("CD14", "TYROBP", "PLP1", "MAG", "TRBC1", "TRBC2", "GAP43")
ct_marker_genes <- tibble(gene = sel_genes, cell_type_marker = rep(c("Myeloid cells", "Oligodendrocytes", "T cells", "Tumor cells"), times = c(2,2,2,1)))
stopifnot(all(sel_genes %in% rowData(fit_small)$gene))
gene_ids <- deframe(as_tibble(rowData(fit_small)[c("gene", "gid")]))[sel_genes]
expr_mat <- counts(fit_small)[gene_ids, ]
cell_type_dot_plot <- as_tibble(as.matrix(t(expr_mat))) %>%
mutate(cell_type = fit_small$colData$cell_type) %>%
pivot_longer(-cell_type, names_to = "gene_id", values_to = "expr") %>%
left_join(enframe(gene_ids, name = "gene_name",value = "gene_id"), by = "gene_id") %>%
summarize(mean_expr = mean(expr, trim = 0),
frac_expr = mean(expr != 0),
.by = c(cell_type, gene_id, gene_name)) %>%
mutate(gene_name = fct_rev(factor(gene_name, levels = sel_genes))) %>%
mutate(cell_type_short = case_when(
cell_type == "Myeloid cells" ~ "Myel.",
cell_type == "Oligodendrocytes" ~ "Oligo.",
cell_type == "Tumor cells" ~ "Tumor",
TRUE ~ cell_type
)) %>%
left_join(ct_marker_genes, by = c("gene_name" = "gene")) %>%
ggplot() +
geom_point(aes(x = cell_type_short, y = gene_name, color = log2(mean_expr + 1), size = frac_expr), stroke = 0) +
geom_rect(data = ct_marker_genes, aes(xmin = 0.6, xmax = 0.75, ymin = -Inf, ymax=Inf, fill = cell_type_marker), show.legend=FALSE) +
facet_grid(rows = vars(cell_type_marker), scales = "free_y") +
colorspace::scale_color_continuous_sequential(limits = c(0, NA), breaks = c(0, 2, 4)) +
scale_fill_manual(values = cell_type_colors) +
scale_size_binned(range = c(0.01, 3), breaks = c(0, 0.25, 0.75, 1), limits = c(0, 1), labels = c(0, "", "", 1)) +
scale_x_discrete(expand = expansion(add = 0.3)) +
scale_y_discrete(expand = expansion(add = 0.4), labels = \(x) paste0("\\emph{", x, "}")) +
coord_cartesian(clip = "off") +
guides(color = guide_colorbar(barheight = unit(1, "mm"), barwidth = unit(5, "mm"), title.vjust = 0.25, label.position = "top", label.vjust = -2),
size = guide_bins(label.vjust = -11)) +
theme(axis.title = element_blank(), legend.position = c(0,-0.2), legend.direction = "horizontal",
legend.box = "horizontal", legend.text = element_text(size = font_size_tiny),
strip.background = element_blank(), strip.text = element_blank(), panel.spacing.y = unit(1.5, "mm")) +
labs(size = "$\\text{frac}[y \\neq 0]$", color = "expression")
## Warning: Ignoring unknown argument to `guide_bins()`: `label.vjust`.
## Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2 3.5.0.
## ℹ Please use the `legend.position.inside` argument of `theme()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
cell_type_dot_plot
chr_ratio_pl <- colData(fit_small) %>%
as_tibble() %>%
dplyr::select(cell_type, chr_10_ratio, chr_7_ratio) %>%
pivot_longer(starts_with("chr_"), names_to = "origin", values_to = "ratio") %>%
mutate(is_tumor = cell_type == "Tumor cells") %>%
ggplot(aes(x = ratio)) +
geom_histogram(aes(fill = is_tumor), position = "identity", alpha = 0.7, show.legend = FALSE, bins = 500) +
facet_wrap(vars(origin), nrow = 1, labeller = as_labeller(c(chr_10_ratio = "Chr.~10 Ratio", chr_7_ratio = "Chr.~7 Ratio")), scales = "free_y") +
scale_y_continuous(expand = expansion(0)) +
scale_x_continuous(expand = expansion(0), breaks = c(0, 1, 2)) +
scale_fill_manual(values = c("TRUE" = unname(cell_type_colors["Tumor cells"]), "FALSE" = "#888888")) +
coord_cartesian(xlim = c(0, 2.1)) +
labs(x = "Expression ratio vs chr. 1-5", y = "No. cells") +
theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
chr_ratio_pl
# new_sel_genes <- c("HIST3H2A", "LMO2", "EPC1", "PTPRZ1", "HBEGF", "SPATA13", "MBP")
# gene2id <- deframe(full_row_data[,c("gene", "gid")])
# ctrl_proj <- lemur:::grassmann_map(lemur:::sum_tangent_vectors(fit_small$coefficients, c(1, 0, 0, rep(0, times = 5))), fit_small$base_point)[,1:2]
# pan_proj <- lemur:::grassmann_map(lemur:::sum_tangent_vectors(fit_small$coefficients, c(1, 0, 1, rep(0, times = 5))), fit_small$base_point)[,1:2]
#
# rownames(ctrl_proj) <- full_row_data$gid
# rownames(pan_proj) <- full_row_data$gid
#
# sqrt(rowSums2(ctrl_proj[gene2id[new_sel_genes],]^2))
# sqrt(rowSums2(pan_proj[gene2id[new_sel_genes],]^2))
sel_genes <- c("CD14", "PLP1", "GAP43", "HIST3H2A", "EPC1")
is_marker_gene <- c("CD14" = TRUE, "PLP1" = TRUE, "GAP43" = TRUE, "HIST3H2A" = FALSE)
top_dirs <- deframe(full_row_data[,c("gene", "gid")])[sel_genes]
ctrl_proj <- lemur:::grassmann_map(lemur:::sum_tangent_vectors(fit_small$coefficients, c(1, 0, 0, rep(0, times = 5))), fit_small$base_point)[,1:2]
rownames(ctrl_proj) <- full_row_data$gid
ctrl_proj_df <- as_tibble(ctrl_proj[top_dirs,], rownames = "gid") %>%
left_join(as_tibble(full_row_data))
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if `.name_repair` is omitted as of tibble 2.0.0.
## ℹ Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## Joining with `by = join_by(gid)`
pan_proj <- lemur:::grassmann_map(lemur:::sum_tangent_vectors(fit_small$coefficients, c(1, 0, 1, rep(0, times = 5))), fit_small$base_point)[,1:2]
rownames(pan_proj) <- full_row_data$gid
pan_proj_df <- as_tibble(pan_proj[top_dirs,], rownames = "gid") %>%
left_join(as_tibble(full_row_data))
## Joining with `by = join_by(gid)`
proj_df <- bind_rows(mutate(ctrl_proj_df, condition = "ctrl"),
mutate(pan_proj_df, condition = "panobinostat"))
manual_text_pos <- tribble(
~gene, ~V1, ~V2, ~hjust, ~vjust,
"CD14", 15, -4, 0, 0,
"PLP1", 1, 18, 0.5, 0,
"GAP43", -15, -9, 1, 0,
"HIST3H2A", 4, -18, 0, 0,
) %>%
mutate(gene_label = paste0("\\emph{", gene, "}"))
# eto_proj <- lemur:::grassmann_map(lemur:::sum_tangent_vectors(fit_small$coefficients, c(1, 1, 0, rep(0, times = 5))), fit_small$base_point)[,1:2]
# rownames(eto_proj) <- full_row_data$gid
# eto_proj_df <- as_tibble(eto_proj[top_dirs,] * c(rep(200, 3), 2000), rownames = "gid") %>%
# left_join(as_tibble(full_row_data))
z_base_space <- as_tibble(colData(fit_small)) %>%
mutate(emb = t(fit_small$embedding[1:2,])) %>%
ggplot(aes(x = emb[,1], y = emb[,2])) +
ggrastr::rasterize(geom_point(aes(color = cell_type), size = 0.07, stroke = 0, show.legend = FALSE), dpi = 600) +
scale_color_manual(values = cell_type_colors) +
small_axis(label = "$\\matr{Z}$ space", xlim = c(-18, 40), ylim = c(-20, 25)) +
labs(subtitle = "LEMUR's latent space ($\\matr{Z}$)\ncolored by cell type")
mc_biplot_marker <- as_tibble(colData(fit_small)) %>%
mutate(emb = t(fit_small$embedding[1:2,])) %>%
ggplot(aes(x = emb[,1], y = emb[,2])) +
ggrastr::rasterize(geom_point(aes(color = cell_type), size = 0.07, stroke = 0, show.legend = FALSE), dpi = 600) +
scale_color_manual(values = cell_type_colors) +
ggnewscale::new_scale_colour() +
geom_segment(data = proj_df %>% filter(is_marker_gene[gene]), aes(x = 0, y = 0, xend = V1 * 200, yend = V2 * 200, color = condition), linewidth = 1.2, show.legend = FALSE) +
geom_point(data = proj_df %>% filter(is_marker_gene[gene]) %>% arrange(desc(condition)), aes(x = V1 * 200, y = V2 * 200, color = condition), size = 0.6, show.legend = FALSE) +
scale_color_manual(values = condition_colors) +
geom_text(data = manual_text_pos %>% filter(is_marker_gene[gene]), aes(x = V1, y = V2, hjust = hjust, vjust = vjust, label = gene_label), size = font_size_small / .pt) +
# coord_fixed(ylim = c(-20, 25), xlim = c(-40, 20), clip = "off") +
small_axis(label = "$\\matr{Z}$ space", xlim = c(-18, 40), ylim = c(-20, 25)) +
labs(subtitle = "Marker genes projected on $\\matr{Z}$")
mc_biplot_diff <- as_tibble(colData(fit_small)) %>%
mutate(emb = t(fit_small$embedding[1:2,])) %>%
ggplot(aes(x = emb[,1], y = emb[,2])) +
ggrastr::rasterize(geom_point(aes(color = cell_type), size = 0.07, stroke = 0, show.legend = FALSE), dpi = 600) +
scale_color_manual(values = cell_type_colors) +
ggnewscale::new_scale_colour() +
geom_segment(data = proj_df %>% filter(!is_marker_gene[gene]) %>% arrange(desc(condition)), aes(x = 0, y = 0, xend = V1 * 2000, yend = V2 * 2000, color = condition),
linewidth = 1.2, show.legend = FALSE) +
geom_point(data = proj_df %>% filter(!is_marker_gene[gene]) %>% arrange(desc(condition)), aes(x = V1 * 2000, y = V2 * 2000,color = condition), size = 0.6, show.legend = FALSE) +
scale_color_manual(values = condition_colors) +
geom_text(data = manual_text_pos %>% filter(!is_marker_gene[gene]), aes(x = V1, y = V2, hjust = hjust, vjust = vjust, label = gene_label), size = font_size_small / .pt) +
# coord_fixed(ylim = c(-20, 25), xlim = c(-40, 20), clip = "off") +
small_axis(label = "$\\matr{Z}$ space", xlim = c(-18, 40), ylim = c(-25, 20)) +
labs(subtitle = "Gene with large expression change projected on $\\matr{Z}$")
mc_biplot_marker
mc_biplot_diff
cell_type_legend_suppl <- ggplot(enframe(cell_type_colors), aes(x = 0, y = 0, color = name)) +
geom_point() +
scale_color_manual(values = cell_type_colors, name = "Cell types") +
theme(legend.position = "bottom", legend.title = element_text(size = font_size_small))
annotation_pl <- cowplot::ggdraw() +
cowplot::draw_label(x = 0.8, y = 0.1, label = "Three other cell types", size = font_size_small, hjust = 0) +
cowplot::draw_line(x = c(0.9, 0.71), y = c(0.15, 0.5), size = 0.25) +
cowplot::draw_label(x = 1, y = 0.6, label = "Tumor cells", size = font_size_small, hjust = 0, vjust = 0.5) +
cowplot::draw_line(x = c(1, 0.82), y = c(0.6, 0.55), size = 0.2)
plot_assemble(add_text("(A) Glioblastoma cell type assignment", x = 1.5, y = 1.5, vjust = 1, fontsize = font_size, fontface = "bold"),
add_plot(cowplot::get_legend(cell_type_legend_suppl), x = 3, y = 5, height = 5),
add_plot(cell_type_dot_plot , x = 0, y = 10, width = 70, height = 40),
add_text("(B) Aneuploidie", x = 95, y = 1.5, vjust = 1, fontsize = font_size, fontface = "bold"),
add_plot(chr_ratio_pl, x = 95, y = 7, width = 60, height = 28),
add_plot(annotation_pl, x = 95, y = 7, width = 60, height = 32),
add_text("(C) Multi-condition biplots", x = 1.5, y = 62, vjust = 1, fontsize = font_size, fontface = "bold"),
add_plot(z_base_space, x = 0, y = 65, width = 55, height = 40),
add_plot(mc_biplot_marker, x = 55, y = 65, width = 55, height = 40),
add_plot(mc_biplot_diff, x = 110, y = 65, width = 55, height = 40),
width = 170, height = 110, units = "mm", show_grid_lines = FALSE, latex_support = TRUE,
filename = "../plots/suppl_glioblastoma_celltype_annotation.pdf")
## Warning in get_plot_component(plot, "guide-box"): Multiple components found;
## returning the first one. To return all, use `return_all = TRUE`.
## Using TikZ metrics dictionary at:
## glioblastoma_analysis-tikzDictionary
## gg[gg1]
## gg[gg2]
## gg[gg3]
## gg[gg4]
## gg[gg5]
## gg[gg6]
## gg[gg7]
## gg[gg8]
## gg[gg9]
## gg[gg10]
## [1] TRUE TRUE TRUE TRUE
Make UMAPs
make_dim_pl <- function(red_dim, color_by = NULL, axis_label = ""){
as_tibble(colData(fit_small)) %>%
mutate(umap = reducedDim(fit_small, red_dim)) %>%
sample_frac(n = 1) %>%
ggplot(aes(x = umap[,1], y = umap[,2])) +
ggrastr::rasterize(geom_point(aes(color = {{color_by}}), size = 0.02, stroke = 0, show.legend = FALSE), dpi = 600) +
small_axis(label = axis_label, arrow_length = if(axis_label != "") 10 else 5)
}
umap_orig_ct_pl <- make_dim_pl("UMAP", color_by = cell_type, axis_label = "") +
scale_color_manual(values = cell_type_colors)
umap_orig_pc_pl <- make_dim_pl("UMAP", color_by = condition, axis_label = "UMAP") +
ggrepel::geom_text_repel(data = . %>% filter(cell_type == "Tumor cells") %>% summarize(umap = matrix(colMedians(umap), nrow = 1), .by = patient_id),
aes(label = patient_id), size = font_size_tiny / .pt) +
scale_color_manual(values = condition_colors)
umap_fit_ct_pl <- make_dim_pl("fit_umap", color_by = cell_type, axis_label = "") +
scale_color_manual(values = cell_type_colors)
umap_fit_pc_pl <- make_dim_pl("fit_umap", color_by = condition, axis_label = "") +
scale_color_manual(values = condition_colors)
umap_fital_ct_pl <- make_dim_pl("fit_al_umap", color_by = cell_type, axis_label = "") +
scale_color_manual(values = cell_type_colors)
umap_fital_pc_pl <-make_dim_pl("fit_al_umap", color_by = condition, axis_label = "") +
scale_color_manual(values = condition_colors)
umap_orig_ct_pl
umap_orig_pc_pl
scale_sym <- function(x, ignore_extreme = 0){
abs_max <- max(abs(quantile(x, c(ignore_extreme, 1-ignore_extreme))))
x / abs_max
}
umap_fit <- reducedDim(fit_small, "fit_al_umap")
pan_genes_order <- c("HIST3H2A", "EPC1", "PTPRZ1", "HBEGF", "SPATA13", "MBP")
is_inside <- tibble(gid = rowData(fit_small)$gid, gene = rowData(fit_small)$gene, cell_id = list(colnames(fit_small))) %>%
filter(gene %in% pan_genes_order) %>%
left_join(dplyr::select(nei_pan_small, name, neighborhood), by = c("gid"= "name")) %>%
mutate(inside = map2(cell_id, neighborhood, \(ref, nei_cells)ref %in% nei_cells)) %>%
dplyr::select(-neighborhood) %>%
unnest(c(inside, cell_id)) %>%
mutate(gene = factor(gene, levels = c(pan_genes_order[1], "LMO2", pan_genes_order[-1])))
is_inside <- bind_rows(is_inside, tibble(gid = rowData(fit_small)$gid, gene = rowData(fit_small)$gene, cell_id = list(colnames(fit_small))) %>%
filter(gene %in% "LMO2") %>%
left_join(dplyr::select(nei_pan_tumor_small, name, neighborhood), by = c("gid"= "name")) %>%
mutate(inside = map2(cell_id, neighborhood, \(ref, nei_cells)ref %in% nei_cells)) %>%
dplyr::select(-neighborhood) %>%
unnest(c(inside, cell_id)) %>%
mutate(gene = factor(gene, levels = c(pan_genes_order[1], "LMO2", pan_genes_order[-1]))))
de_regions_maps <- nei_pan_small %>%
unnest(neighborhood) %>%
left_join(tibble(umap = umap_fit, neighborhood = rownames(umap_fit)), by = "neighborhood") %>%
left_join(as_tibble(full_row_data), by = c("name" = "gid")) %>%
filter(gene %in% pan_genes_order) %>%
mutate(gene = factor(gene, levels = pan_genes_order))
de_regions_maps2 <- nei_pan_tumor_small %>%
unnest(neighborhood) %>%
left_join(tibble(umap = umap_fit, neighborhood = rownames(umap_fit)), by = "neighborhood") %>%
left_join(as_tibble(full_row_data), by = c("name" = "gid"))
de_plot_data <- as_tibble(colData(fit_small), rownames = "cell_id") %>%
mutate(umap = umap_fit) %>%
mutate(de = as_tibble(t(assay(fit_small, "DE_panobinostat")))) %>%
unnest(de, names_sep = "-") %>%
pivot_longer(starts_with("de-"), names_sep = "-", values_to = "de", names_to = c(NA, "gid")) %>%
inner_join(as_tibble(full_row_data), by = "gid") %>%
filter(gene %in% c(pan_genes_order, "LMO2")) %>%
mutate(gene = factor(gene, levels = c(pan_genes_order[1], "LMO2", pan_genes_order[-1]))) %>%
left_join(is_inside, by = c("gid", "gene", "cell_id")) %>%
mutate(inside = ifelse(inside, "in", "out")) %>%
mutate(inside = factor(inside, levels = c("in", "out"))) %>%
sample_frac(size = 1)
de_plots <- de_plot_data %>%
filter(gene != "LMO2" | cell_type == "Tumor cells") %>%
group_by(gene) %>%
group_map(\(data, key){
abs_max <- max(abs(quantile(data$de, c(0.95, 0.05))))
data$gene <- key[[1]][1]
ggplot(data, aes(x = umap[,1], y = umap[,2])) +
ggrastr::rasterise(geom_point(aes(color = de), size = 0.05, stroke = 0), dpi = 600) +
# scale_colour_gradient2_rev(limits = c(-1, 1) * abs_max, oob = scales::squish, breaks = c(-1, 0, 1) * signif_to_zero(abs_max, 1), mid = "lightgrey") +
scale_color_de_gradient(abs_max, mid_width = 0.2) +
facet_grid(vars(inside), vars(gene), labeller = labeller(inside =as_labeller( c("in" = "Cells in Neighb.", "out" = "All other cells")),
gene = as_labeller(\(x) paste0("\\emph{", x, "}"))), switch = "y") +
scale_x_continuous(limits = range(umap_fit[,1]) * 1.1) +
scale_y_continuous(limits = range(umap_fit[,2]) * 1.1) +
small_axis(label = "", fontsize = font_size_tiny, xlim = range(umap_fit[,1]), ylim = range(umap_fit[,2]), arrow_length = 3) +
guides(color = guide_colorbar(barheight = unit(1, "mm"), title = "")) +
theme(panel.spacing.x = unit(5, "mm"), legend.position = "bottom")
}) %>%
cowplot::plot_grid(plotlist = ., nrow = 1)
de_plots
de_plot_data_hist3h2a <- de_plot_data %>%
filter(gene == "HIST3H2A")
abs_max <- max(abs(quantile(de_plot_data_hist3h2a$de, c(0.95, 0.05))))
de_plot_full <- ggplot(de_plot_data_hist3h2a, aes(x = umap[,1], y = umap[,2])) +
ggrastr::rasterise(geom_point(aes(color = de), size = 0.05, stroke = 0), dpi = 600) +
scale_color_de_gradient(abs_max, mid_width = 0.2) +
scale_x_continuous(limits = range(umap_fit[,1]) * 1.1) +
scale_y_continuous(limits = range(umap_fit[,2]) * 1.1) +
small_axis(label = "", fontsize = font_size_tiny, xlim = range(umap_fit[,1]), ylim = range(umap_fit[,2]), arrow_length = 3) +
theme(legend.position = "bottom")
de_plot_full
de_plot_split <- ggplot(de_plot_data_hist3h2a, aes(x = umap[,1], y = umap[,2])) +
ggrastr::rasterise(geom_point(aes(color = de), size = 0.05, stroke = 0), dpi = 600) +
scale_color_de_gradient(abs_max, mid_width = 0.2) +
facet_wrap(vars(inside), ncol = 1, labeller = as_labeller(c("in" = "Cells in Neighborhood", "out" = "All other cells"))) +
scale_x_continuous(limits = range(umap_fit[,1]) * 1.1) +
scale_y_continuous(limits = range(umap_fit[,2]) * 1.1) +
small_axis(label = "", fontsize = font_size_tiny, xlim = range(umap_fit[,1]), ylim = range(umap_fit[,2]), arrow_length = 3) +
theme(legend.position = "bottom")
de_plot_split
expr_plot_data <- as_tibble(colData(fit_small), rownames = "cell_id") %>%
mutate(umap = umap_fit) %>%
mutate(expr = as_tibble(as.matrix(t(assay(fit_small, "logcounts"))))) %>%
unnest(expr, names_sep = "-") %>%
pivot_longer(starts_with("expr-"), names_sep = "-", values_to = "expr", names_to = c(NA, "gid")) %>%
inner_join(as_tibble(full_row_data), by = "gid") %>%
sample_frac(size = 1) %>%
filter(gid %in% nei_pan_small$name | gid %in% nei_pan_tumor_small$name)
expr_plots <- expr_plot_data %>%
filter(condition != "etoposide") %>%
mutate(gene = factor(gene, levels = c(pan_genes_order[1], "LMO2", pan_genes_order[-1]))) %>%
filter(! is.na(gene)) %>%
group_by(gene) %>%
group_map(\(data, key){
data$gene <- key[[1]][1]
ggplot(data, aes(x = umap[,1], y = umap[,2])) +
ggrastr::rasterise(geom_point(aes(color = expr), size = 0.05, stroke = 0), dpi = 300) +
geom_density2d(data = filter(rbind(de_regions_maps %>% mutate(gene = as.character(gene)), de_regions_maps2), gene == key[[1]]), breaks = 0.2, linewidth = 0.5,
contour_var = "ndensity", color = "black",
h = apply(umap_fit, 2, MASS::bandwidth.nrd) * 0.9,
show.legend = FALSE) +
colorspace::scale_color_continuous_sequential(limits = c(0, quantile(data$expr, c(0.98))), oob = scales::oob_squish, palette = "Purples 2", n.breaks = 3) +
facet_grid(vars(condition), vars(gene), labeller = labeller(gene = as_labeller(\(x) paste0("\\emph{", x, "}"))), switch = "y") +
scale_x_continuous(limits = range(umap_fit[,1]) * 1.1) +
scale_y_continuous(limits = range(umap_fit[,2]) * 1.1) +
small_axis(label = "", fontsize = font_size_tiny, xlim = range(umap_fit[,1]), ylim = range(umap_fit[,2]), arrow_length = 3) +
guides(color = guide_colorbar(barheight = unit(1, "mm"), title = "")) +
theme(panel.spacing.x = unit(5, "mm"), legend.position = "bottom") +
labs(color = "")
}) %>%
cowplot::plot_grid(plotlist = ., nrow = 1)
## Warning: Removed 7076 rows containing non-finite outside the scale range
## (`stat_density2d()`).
## Warning: Removed 6336 rows containing non-finite outside the scale range
## (`stat_density2d()`).
## Warning: Removed 50998 rows containing non-finite outside the scale range
## (`stat_density2d()`).
## Warning: Removed 37598 rows containing non-finite outside the scale range
## (`stat_density2d()`).
## Warning: Removed 5170 rows containing non-finite outside the scale range
## (`stat_density2d()`).
## Warning: Removed 2448 rows containing non-finite outside the scale range
## (`stat_density2d()`).
## Warning: Removed 3206 rows containing non-finite outside the scale range
## (`stat_density2d()`).
expr_plots
pan_genes_ids <- deframe(full_row_data[, c("gene", "gid")])[c(pan_genes_order, "LMO2")]
mask <- matrix(NA, nrow = length(pan_genes_ids), ncol = ncol(fit_small),
dimnames = list(pan_genes_ids, colnames(fit_small)))
mask2 <- matrix(1, nrow = length(pan_genes_ids), ncol = ncol(fit_small),
dimnames = list(pan_genes_ids, colnames(fit_small)))
for(id in pan_genes_ids){
if(id == "ENSG00000135363"){
inside_cells <- intersect(colnames(fit_small), filter(nei_pan_tumor_small, name == id)$neighborhood[[1]])
}else{
inside_cells <- intersect(colnames(fit_small), filter(nei_pan_small, name == id)$neighborhood[[1]])
}
mask[id, inside_cells] <- 1
mask2[id, inside_cells] <- NA
}
psce <- glmGamPoi::pseudobulk(SingleCellExperiment(list(inside = as.matrix(logcounts(fit_small)[pan_genes_ids,] * mask),
outside = as.matrix(logcounts(fit_small)[pan_genes_ids,] * mask2))),
group_by = vars(patient_id, condition, pat_cond), n = n(),
aggregation_functions = list(.default = \(...) matrixStats::rowMeans2(..., na.rm = TRUE)),
col_data = as.data.frame(colData(fit_small)))
## Aggregating assay 'inside' using 'custom function'.
## Aggregating assay 'outside' using 'custom function'.
comparison_plots <- as_tibble(colData(psce)) %>%
mutate(expr_in = as_tibble(t(assay(psce, "inside")))) %>%
mutate(expr_out = as_tibble(t(assay(psce, "outside")))) %>%
unpack(starts_with("expr"), names_sep = "-") %>%
pivot_longer(starts_with("expr"), names_sep = "[-_]", names_to = c(".value", "inside", "gid")) %>%
left_join(as_tibble(full_row_data)) %>%
filter(condition != "etoposide") %>%
mutate(condition_short = case_when(
condition == "ctrl" ~ "ctrl",
condition == "panobinostat" ~ "pano.",
)) %>%
mutate(inside = factor(inside, levels = c("in", "out"))) %>%
mutate(gene = factor(gene, levels = c(pan_genes_order[1], "LMO2", pan_genes_order[-1]))) %>%
group_by(gene) %>%
group_map(\(data, key){
data$gene <- key[[1]][1]
ggplot(data, aes(x = condition_short, y = expr)) +
geom_point(aes(group = condition, color = condition), position = position_dodge(width = 0.7), size = 0.6, show.legend = FALSE) +
geom_line(aes(group = patient_id, x = condition_short, y = expr), color = "lightgrey", linewidth = 0.3) +
scale_color_manual(values = condition_colors) +
scale_y_continuous(limits = c(0, NA), expand = expansion(mult = c(0, 0.1)), n.breaks = 3) +
coord_cartesian(clip = "off") +
ggh4x::facet_nested(~ gene + inside, labeller = labeller(inside =as_labeller( c("in" = "Inside", "out" = "Outside")),
gene = as_labeller(\(x) paste0("\\emph{", x, "}"))),
strip = ggh4x::strip_nested(clip = "off")) +
guides(x = guide_axis(angle = 90)) +
theme(axis.title.x = element_blank(), axis.text = element_text(size = font_size_tiny),
strip.placement = "outside", panel.spacing.x = unit(2, "mm"))
}) %>%
cowplot::plot_grid(plotlist = ., nrow = 1)
## Joining with `by = join_by(gid)`
comparison_plots
comparison_pl <- as_tibble(colData(psce)) %>%
mutate(expr_inside = as_tibble(t(assay(psce, "inside")))) %>%
unpack(starts_with("expr"), names_sep = "-") %>%
pivot_longer(starts_with("expr"), names_sep = "[-_]", names_to = c(".value", "origin", "gid")) %>%
left_join(as_tibble(full_row_data)) %>%
filter(condition != "etoposide") %>%
mutate(condition_short = case_when(
condition == "ctrl" ~ "ctrl",
condition == "panobinostat" ~ "panob.",
)) %>%
mutate(gene = factor(gene, levels = pan_genes_order)) %>%
filter(gene == "HIST3H2A") %>%
ggplot(aes(x = condition_short, y = expr)) +
geom_point(aes(group = condition, color = condition), position = position_dodge(width = 0.7), size = 0.6, show.legend = FALSE) +
geom_line(aes(group = patient_id, x = condition_short, y = expr), color = "lightgrey", linewidth = 0.3) +
scale_color_manual(values = condition_colors) +
scale_y_continuous(limits = c(0, NA), expand = expansion(mult = c(0, 0.2)), n.breaks = 3) +
coord_cartesian(clip = "off") +
# guides(x = guide_axis(angle = 90)) +
labs(subtitle = "Pseudobulked expr. of\ncells in neighborhood") +
theme(axis.title = element_blank(), plot.subtitle = element_text(size = font_size_small))
## Joining with `by = join_by(gid)`
comparison_pl
plot_assemble(add_text("(A) Expression pattern of selected genes under panobinostat treatment", x = 1.5, y = 1.5, vjust = 1, fontsize = font_size, fontface = "bold"),
add_plot(expr_plots, x = 0, y = 5, width = 170, height = 50),
add_text("(B) Predicted differential expression pattern ($\\Delta$)", x = 1.5, y = 57, vjust = 1, fontsize = font_size, fontface = "bold"),
add_plot(de_plots, x = 0, y = 60, width = 170, height = 50),
add_text("(C) Pseudobulked differential expression", x = 1.5, y = 117, vjust = 1, fontsize = font_size, fontface = "bold"),
add_plot(comparison_plots, x = 0, y = 120, width = 170, height = 30),
width = 170, height = 155, units = "mm", show_grid_lines = FALSE, latex_support = TRUE,
filename = "../plots/suppl_gene_expr_patterns.pdf")
## gg[gg1]
## gg[gg2]
## gg[gg3]
## gg[gg4]
## gg[gg5]
## gg[gg6]
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
is_inside2 <- tibble(gid = rowData(fit_small)$gid, gene = rowData(fit_small)$gene, cell_id = list(colnames(fit_small))) %>%
filter(gid %in% "ENSG00000135363") %>%
left_join(dplyr::select(nei_pan_tumor_small, name, neighborhood), by = c("gid"= "name")) %>%
mutate(inside = map2(cell_id, neighborhood, \(ref, nei_cells)ref %in% nei_cells)) %>%
dplyr::select(-neighborhood) %>%
unnest(c(inside, cell_id))
de_plot_data2 <- as_tibble(colData(fit_small), rownames = "cell_id") %>%
mutate(umap = umap_fit) %>%
mutate(de = as_tibble(t(assay(fit_small, "DE_panobinostat")))) %>%
unnest(de, names_sep = "-") %>%
pivot_longer(starts_with("de-"), names_sep = "-", values_to = "de", names_to = c(NA, "gid")) %>%
inner_join(as_tibble(full_row_data), by = "gid") %>%
inner_join(is_inside2, by = c("gid", "cell_id", "gene")) %>%
filter(gid == "ENSG00000135363") %>%
sample_frac(size = 1)
abs_max <- max(abs(quantile(de_plot_data2$de, c(0.95, 0.05))))
de_plots2 <- de_plot_data2 %>%
mutate(inside = ifelse(inside, "in", "out")) %>%
mutate(inside = factor(inside, levels = c("in", "out"))) %>%
mutate(de = ifelse(cell_type == "Tumor cells", de, NA)) %>%
ggplot(aes(x = umap[,1], y = umap[,2])) +
ggrastr::rasterise(geom_point(aes(color = de), size = 0.05, stroke = 0), dpi = 600) +
# scale_colour_gradient2_rev(limits = c(-1, 1) * abs_max, oob = scales::squish, breaks = c(-1, 0, 1) * signif_to_zero(abs_max, 1), mid = "lightgrey", na.value = "#00000000") +
scale_color_de_gradient(abs_max, mid_width = 0.2, na.value = "#00000000") +
facet_wrap(vars(inside), nrow = 1, labeller = as_labeller(c("in" = "Cells in Neighborhood", "out" = "Other tumor cells"))) +
scale_x_continuous(limits = range(umap_fit[,1]) * 1.1) +
scale_y_continuous(limits = range(umap_fit[,2]) * 1.1) +
small_axis(label = "", fontsize = font_size_tiny, xlim = range(umap_fit[,1]), ylim = range(umap_fit[,2]), arrow_length = 3) +
theme(panel.spacing.x = unit(-2, "mm"), legend.position = "bottom")
de_plots2
genes_of_interest <- c("ENSG00000135363")
mask <- matrix(NA, nrow = length(genes_of_interest), ncol = ncol(fit_small),
dimnames = list(genes_of_interest, colnames(fit_small)))
mask2 <- matrix(1, nrow = length(genes_of_interest), ncol = ncol(fit_small),
dimnames = list(genes_of_interest, colnames(fit_small)))
for(id in genes_of_interest){
inside_cells <- intersect(colnames(fit_small), filter(nei_pan_tumor_small, name == id)$neighborhood[[1]])
mask[id, inside_cells] <- 1
mask2[id, inside_cells] <- NA
}
psce2 <- glmGamPoi::pseudobulk(SingleCellExperiment(list(inside = as.matrix(logcounts(fit_small)[genes_of_interest,] * mask),
outside = as.matrix(logcounts(fit_small)[genes_of_interest,] * mask2))),
group_by = vars(patient_id, condition, pat_cond), n = n(),
aggregation_functions = list(.default = \(...) matrixStats::rowMeans2(..., na.rm = TRUE)),
col_data = as.data.frame(colData(fit_small)))
## Aggregating assay 'inside' using 'custom function'.
## Aggregating assay 'outside' using 'custom function'.
comparison_data2 <- as_tibble(colData(psce2)) %>%
mutate(expr_inside = as_tibble(t(assay(psce2, "inside"))),
expr_outside = as_tibble(t(assay(psce2, "outside")))) %>%
unpack(starts_with("expr"), names_sep = "-") %>%
pivot_longer(starts_with("expr"), names_sep = "[-_]", names_to = c(".value", "origin", "gid")) %>%
left_join(as_tibble(full_row_data)) %>%
filter(condition != "etoposide") %>%
mutate(condition_short = case_when(
condition == "ctrl" ~ "ctrl",
condition == "panobinostat" ~ "panob.",
))
## Joining with `by = join_by(gid)`
diff_df <- comparison_data2 %>%
summarize(expr = mean(expr), .by = c(condition, origin)) %>%
pivot_wider(names_from = condition, values_from = expr)
comparison_pl2 <- comparison_data2 %>%
filter(patient_id != "PW029") %>%
ggplot(aes(x = condition_short, y = expr)) +
# geom_boxplot(outlier.shape = NA, width = 0.3) +
geom_line(aes(group = patient_id, x = condition_short, y = expr), color = "lightgrey", linewidth = 0.3) +
geom_point(aes(group = condition, color = condition), position = position_dodge(width = 0.7), size = 0.8, show.legend = FALSE) +
geom_segment(data = diff_df, aes(x = 1.5, y = ctrl, xend = 1.5, yend = panobinostat), color = "red", linewidth = 0.8, arrow = arrow(type = "closed", length = unit(1, "mm"))) +
facet_grid(cols = vars(origin), labeller = as_labeller(c("inside" = "Cells in\nNeighborhood", "outside" = "Other tumor\ncells"))) +
scale_color_manual(values = condition_colors) +
scale_y_continuous(limits = c(0, NA), expand = expansion(mult = c(0, 0.05)), n.breaks = 3) +
coord_cartesian(clip = "off") +
# guides(x = guide_axis(angle = 90)) +
theme(panel.spacing.x = unit(1, "mm"), axis.title.x = element_blank()) +
labs(y = "pseudobulked expr.")
diff_pl <- diff_df %>%
ggplot(aes(x = origin, xend = origin, y = 0, yend = panobinostat - ctrl)) +
geom_hline(yintercept = 0) +
geom_segment(color = "red", linewidth = 0.8, arrow = arrow(type = "closed", length = unit(1, "mm"))) +
ggsignif::geom_signif(comparison = list(c("inside", "outside")), annotations = "**", textsize = font_size_small / .pt, y_position = 0.01) +
scale_y_continuous(limits = c(-0.1, 0.1), expand = expansion(mult = c(0.05, 0.05)), n.breaks = 3) +
labs(subtitle = "$ \\text{Diff. in diff.}$\n($\\Delta_{\\text{in}} - \\Delta_{\\text{out}}$)",
y = "$\\Delta$") +
theme(axis.title.x = element_blank(), plot.subtitle = element_text(hjust = 0.5))
comparison_pl2
diff_pl
psce_lmo2_ctrl <- psce_lmo2[,psce_lmo2$condition == "ctrl"]
# glm_fit2 <- lemur:::edger_fit(counts(psce_lmo2_ctrl), design = ~ patient_id + inside, col_data = colData(psce_lmo2_ctrl),
# offset = log(glmGamPoi:::estimate_size_factors(counts(psce_lmo2_ctrl), method = "ratio")))
# glm_de_res <- lemur:::edger_test_de(glm_fit2, contrast = c(0, 0, 0, 0, 0, 0, 1))
glm_fit <- glmGamPoi::glm_gp(psce_lmo2[,psce_lmo2$condition == "ctrl"], design = ~ patient_id + inside, size_factors = "ratio", verbose = TRUE)
## Calculate Size Factors (ratio)
## Make initial dispersion estimate
## Make initial beta estimate
## Estimate beta
## Estimate dispersion
## Fit dispersion trend
## Shrink dispersion estimates
## Estimate beta again
glm_de_res <- glmGamPoi::test_de(glm_fit, cond(inside = TRUE) - cond(inside = FALSE))
as_tibble(glm_de_res) %>%
arrange(pval) %>%
left_join(as_tibble(full_row_data), by = c("name" = "gid")) %>%
print(n = 20)
## # A tibble: 10,000 × 13
## name pval adj_pval f_statistic df1 df2 lfc gene gene_id
## <chr> <dbl> <dbl> <dbl> <int> <dbl> <dbl> <chr> <chr>
## 1 ENSG00000279576 1.98e-9 1.98e-5 329. 1 10.8 1.19 AP00… ENSG00…
## 2 ENSG00000114942 1.52e-7 5.77e-4 142. 1 10.8 -0.761 EEF1… ENSG00…
## 3 ENSG00000140988 2.61e-7 5.77e-4 127. 1 10.8 -0.763 RPS2 ENSG00…
## 4 ENSG00000239672 2.87e-7 5.77e-4 125. 1 10.8 -0.646 NME1 ENSG00…
## 5 ENSG00000089157 3.38e-7 5.77e-4 121. 1 10.8 -0.793 RPLP0 ENSG00…
## 6 ENSG00000005022 4.89e-7 5.77e-4 112. 1 10.8 -0.665 SLC2… ENSG00…
## 7 ENSG00000198242 5.61e-7 5.77e-4 109. 1 10.8 -0.700 RPL2… ENSG00…
## 8 ENSG00000112306 6.34e-7 5.77e-4 106. 1 10.8 -0.765 RPS12 ENSG00…
## 9 ENSG00000244313 6.51e-7 5.77e-4 106. 1 10.8 -0.762 RP11… ENSG00…
## 10 ENSG00000149273 6.60e-7 5.77e-4 106. 1 10.8 -0.633 RPS3 ENSG00…
## 11 ENSG00000251562 6.90e-7 5.77e-4 105. 1 10.8 0.962 MALA… ENSG00…
## 12 ENSG00000148303 8.39e-7 5.77e-4 101. 1 10.8 -0.703 RPL7A ENSG00…
## 13 ENSG00000204628 8.82e-7 5.77e-4 99.6 1 10.8 -0.621 GNB2… ENSG00…
## 14 ENSG00000225217 9.27e-7 5.77e-4 98.6 1 10.8 3.41 HSPA7 ENSG00…
## 15 ENSG00000171863 9.32e-7 5.77e-4 98.5 1 10.8 -0.710 RPS7 ENSG00…
## 16 ENSG00000145912 1.12e-6 5.77e-4 94.8 1 10.8 -0.639 NHP2 ENSG00…
## 17 ENSG00000129235 1.13e-6 5.77e-4 94.7 1 10.8 -0.550 TXND… ENSG00…
## 18 ENSG00000226221 1.18e-6 5.77e-4 93.9 1 10.8 -0.796 RPL2… ENSG00…
## 19 ENSG00000119705 1.21e-6 5.77e-4 93.4 1 10.8 -0.582 SLIRP ENSG00…
## 20 ENSG00000108561 1.21e-6 5.77e-4 93.4 1 10.8 -0.629 C1QBP ENSG00…
## # ℹ 9,980 more rows
## # ℹ 4 more variables: chromosome <fct>, gene_length <int>, strand. <fct>,
## # source <fct>
ego1 <- clusterProfiler::enrichGO(gene = glm_de_res %>%
# filter(lfc > +0.1) %>%
slice_min(pval, n = 200) %>%
pull(name),
universe = glm_de_res$name,
OrgDb = org.Hs.eg.db::org.Hs.eg.db,
keyType = "ENSEMBL",
readable = TRUE,
ont = "ALL")
##
##
# ego2 <- clusterProfiler::enrichGO(gene = glm_de_res %>%
# filter(lfc < -0.1) %>%
# slice_min(pval, n = 200) %>%
# pull(name),
# universe = glm_de_res$name,
# OrgDb = org.Hs.eg.db::org.Hs.eg.db,
# keyType = "ENSEMBL",
# readable = TRUE,
# ont = "ALL")
ekegg1 <- clusterProfiler::enrichKEGG(gene = glm_de_res %>%
slice_min(pval, n = 200) %>%
pull(name) %>%
clusterProfiler::bitr(fromType = "ENSEMBL", toType = "ENTREZID", OrgDb = org.Hs.eg.db::org.Hs.eg.db, drop=TRUE) %>%
pull(ENTREZID),
universe = clusterProfiler::bitr(glm_de_res$name, fromType = "ENSEMBL", toType = "ENTREZID", OrgDb = org.Hs.eg.db::org.Hs.eg.db, drop=TRUE)$ENTREZID,
organism = "hsa",
keyType = "kegg")
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/pathway"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/hsa"...
## 'select()' returned 1:many mapping between keys and columns
## Warning in clusterProfiler::bitr(., fromType = "ENSEMBL", toType = "ENTREZID",
## : 3% of input gene IDs are fail to map...
## 'select()' returned 1:many mapping between keys and columns
## Warning in clusterProfiler::bitr(glm_de_res$name, fromType = "ENSEMBL", : 2.58%
## of input gene IDs are fail to map...
as_tibble(ego1) %>% print(n = 10)
## # A tibble: 124 × 10
## ONTOLOGY ID Description GeneRatio BgRatio pvalue p.adjust qvalue
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 BP GO:0002181 cytoplasmic… 70/163 147/88… 5.71e-87 1.17e-83 1.10e-83
## 2 BP GO:0042254 ribosome bi… 35/163 270/88… 1.72e-20 1.76e-17 1.66e-17
## 3 BP GO:0022613 ribonucleop… 39/163 404/88… 3.42e-18 2.33e-15 2.20e-15
## 4 BP GO:0042273 ribosomal l… 17/163 65/8838 1.24e-15 5.58e-13 5.25e-13
## 5 BP GO:0042255 ribosome as… 16/163 55/8838 1.36e-15 5.58e-13 5.25e-13
## 6 BP GO:0042274 ribosomal s… 16/163 76/8838 3.56e-13 1.21e-10 1.14e-10
## 7 BP GO:0006364 rRNA proces… 21/163 197/88… 6.49e-11 1.90e- 8 1.78e- 8
## 8 BP GO:0000027 ribosomal l… 9/163 23/8838 1.30e-10 3.32e- 8 3.12e- 8
## 9 BP GO:0016072 rRNA metabo… 22/163 229/88… 1.70e-10 3.87e- 8 3.65e- 8
## 10 BP GO:0022618 ribonucleop… 18/163 182/88… 5.54e- 9 1.13e- 6 1.07e- 6
## # ℹ 114 more rows
## # ℹ 2 more variables: geneID <chr>, Count <int>
# as_tibble(ego2) %>% print(n = 10)
as_tibble(ekegg1) %>% print(n = 10)
## # A tibble: 2 × 9
## ID Description GeneRatio BgRatio pvalue p.adjust qvalue geneID Count
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <chr> <int>
## 1 hsa03010 Ribosome 71/126 128/45… 5.24e-85 6.60e-83 6.57e-83 6187/… 71
## 2 hsa05171 Coronaviru… 66/126 162/45… 1.58e-66 9.96e-65 9.90e-65 6187/… 66
as_tibble(ego1) %>%
filter(ID == "GO:0002181")
## # A tibble: 1 × 10
## ONTOLOGY ID Description GeneRatio BgRatio pvalue p.adjust qvalue geneID
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <chr>
## 1 BP GO:0… cytoplasmi… 70/163 147/88… 5.71e-87 1.17e-83 1.10e-83 RPS2/…
## # ℹ 1 more variable: Count <int>
# as_tibble(ego2) %>%
# filter(ID == "GO:0002181")
neg_log10_trans <- scales::trans_new("neg_log10", trans = \(x) -log10(x), inv = \(x) 10^(-x), domain = c(1e-100, Inf))
volcano_enrichment_pl <- as_tibble(glm_de_res) %>%
# mutate(up_genes = name %in% ego1@geneSets[["GO:0006325"]]) %>%
mutate(down_genes = name %in% ego1@geneSets[["GO:0002181"]]) %>%
mutate(category = case_when(
# up_genes ~ "Chromatin Reorganization",
down_genes ~ "Cytopl. Transl. (GO:0002181)",
TRUE ~ "other"
)) %>%
arrange(desc(category)) %>%
mutate(lfc = ifelse(lfc < -2.7, -Inf, lfc)) %>%
ggplot(aes(x = lfc, y = pval)) +
ggrastr::rasterize(geom_point(data = . %>% filter(category == "other"), color = "lightgrey", stroke = 0, size = 0.2), dpi = 300) +
ggrastr::rasterize(geom_point(data = . %>% filter(category != "other"), aes(color = category), stroke = 0, size = 0.6), dpi = 300) +
small_arrow(label = "up inside", position = c(0.7, 0.98), offset = unit(0.95, "npc"), label_offset = unit(-1.3, "mm"), fontsize = font_size_tiny) +
small_arrow(label = "down inside",position = c(0.3, 0.02), offset = unit(0.95, "npc"), label_offset = unit(-1.3, "mm"), fontsize = font_size_tiny) +
geom_hline(yintercept = max(glm_de_res$pval[glm_de_res$adj_pval < 0.1])) +
scale_color_manual(values = c("Cytopl. Transl. (GO:0002181)" = "#8E063B")) +
scale_y_continuous(limits = c(1, NA), expand = expansion(mult = c(0, 0.05)), trans = neg_log10_trans,
breaks = 10^seq(0, -11, by = -2), labels = \(x) glue("$10^{{{log10(x)}}}$")) +
scale_x_continuous(limits = c(-3, 3)) +
guides(color = guide_legend(override.aes = list(size = 2))) +
labs(x = "Log fold change", y = "p-value", color = "") +
theme(legend.position = "bottom")
volcano_enrichment_pl
## Warning: Removed 3 rows containing missing values or values outside the scale
## range (`geom_point()`).
dim(fit_small)
## [1] 17 65955
table(nei_pan_mod$adj_pval < 0.1)
##
## FALSE TRUE
## 7502 2498
as_tibble(nei_pan_tumor_small) %>%
left_join(as_tibble(rowData(fit_small)) %>% dplyr::select(name = gid, gene_name = gene))
## Joining with `by = join_by(name)`
## # A tibble: 4 × 14
## name neighborhood n_cells sel_statistic pval adj_pval f_statistic df1
## <chr> <I<list>> <int> <dbl> <dbl> <dbl> <dbl> <int>
## 1 ENSG000… <chr> 12014 -37.3 1.11e-4 0.00403 25.2 1
## 2 ENSG000… <chr> 12598 -47.7 3.33e-6 0.000680 46.5 1
## 3 ENSG000… <chr> 12598 -47.6 5.32e-6 0.000858 43.1 1
## 4 ENSG000… <chr> 7397 -31.2 2.33e-2 0.0972 6.24 1
## # ℹ 6 more variables: df2 <dbl>, lfc <dbl>, did_pval <dbl>, did_adj_pval <dbl>,
## # did_lfc <dbl>, gene_name <chr>
table(fit_small$colData$cell_type)
##
## Myeloid cells Oligodendrocytes T cells Tumor cells
## 9704 9926 360 45965
sum(filter(is_inside2, gene == "LMO2")$inside)
## [1] 9430
sum(fit_small$colData$cell_type == "Tumor cells") - sum(filter(is_inside2, gene == "LMO2")$inside)
## [1] 36535
cell_type_legend <- my_get_legend(
ggplot(enframe(cell_type_colors), aes(x = 0, y = 0, color = name)) +
geom_point() +
scale_color_manual(values = cell_type_colors, name = "Cell types") +
theme(legend.position = "left", legend.title = element_text(size = font_size)))
condition_legend <- my_get_legend(
ggplot(enframe(condition_colors) %>% filter(name != "etoposide"), aes(x = 0, y = 0, color = name)) +
geom_point() +
scale_color_manual(values = condition_colors, name = "Conditions") +
theme(legend.position = "left", legend.title = element_text(size = font_size)))
# hist3h2a_legend <- cowplot::get_legend({
# de_plot_full +
# guides(color = guide_colorbar(title = "", label.theme = element_text(size = font_size_tiny, margin = margin(t = 0 , unit = "mm")),
# barheight = unit(1.5, "mm"), barwidth = unit(23, "mm")))
# })
lmo2_legend <- my_get_legend({
de_plots2 +
guides(color = guide_colorbar(title = "", label.theme = element_text(size = font_size_tiny, margin = margin(t = 0.5, unit = "mm")),
barheight = unit(1.5, "mm"), barwidth = unit(21, "mm")))
})
volcano_legend <- my_get_legend({
volcano_enrichment_pl +
guides(color = guide_legend(override.aes = list(size = 1))) +
labs(color = "")
})
## Warning: Removed 3 rows containing missing values or values outside the scale
## range (`geom_point()`).
row1_umap_width <- 48
plot_assemble(
add_text("(A) Glioblastoma experimental design", x = 1.5, y = 1.5, vjust = 1, fontsize = font_size, fontface = "bold"),
add_graphic("../illustrations/glioblastoma_experiment_overview_wideArtboard 1.pdf", x = 1.5, y = 2.5, units = "mm"),
add_text("(B) Integration with LEMUR adjusting for patient and treatment effect", x = 1.5, y = 24.5, vjust = 1, fontsize = font_size, fontface = "bold"),
add_plot(condition_legend, x = 3, y = 31 , height = 34),
add_text("UMAP of $\\matr{Y}$", x = 25 + row1_umap_width * 0.5, y = 30, vjust = 1, hjust = 0.5, fontsize = 7),
add_text("UMAP of $\\matr{Z}$ if $\\matr{S} = I$", x = 25 + row1_umap_width * 1.5, y = 30, vjust = 1, hjust = 0.5, fontsize = 7),
add_text("UMAP of $\\matr{Z}$ if $\\matr{S}$ estimated with Harmony", x = 25 + row1_umap_width * 2.5, y = 30, vjust = 1, hjust = 0.5, fontsize = 7),
add_plot(umap_orig_pc_pl, x = 25 + row1_umap_width * 0, y = 31, width = row1_umap_width, height = 40),
add_plot(umap_fit_pc_pl, x = 25 + row1_umap_width * 1, y = 31, width = row1_umap_width, height = 40),
add_plot(umap_fital_pc_pl, x = 25 + row1_umap_width * 2, y = 31, width = row1_umap_width, height = 40),
add_plot(cell_type_legend, x = 3, y = 70, height = 40),
add_plot(umap_orig_ct_pl, x = 25 + row1_umap_width * 0, y = 70, width = row1_umap_width, height = 40),
add_plot(umap_fit_ct_pl, x = 25 + row1_umap_width * 1, y = 70, width = row1_umap_width, height = 40),
add_plot(umap_fital_ct_pl, x = 25 + row1_umap_width * 2, y = 70, width = row1_umap_width, height = 40),
add_text("(C) Panobinostat down-regulates \\emph{LMO2} in a subpopulation of tumor cells", x = 1.5, y = 111, vjust = 1, fontsize = font_size, fontface = "bold"),
add_plot(de_plots2 + guides(color = "none"), x = 0, y = 113, width = 61, height = 40),
add_plot(lmo2_legend, x = 16.5, y = 151, width = 30, height = 5),
add_text("down", x = 16, y = 150.9, vjust = 1, fontsize = font_size_small, hjust = 1),
add_text("regulation", x = 16, y = 152.9, vjust = 1, fontsize = font_size_small, hjust = 1),
add_text("up", x = 40.5, y = 150.9, vjust = 1, fontsize = font_size_small, hjust = 0),
add_text("regulation", x = 40.5, y = 152.9, vjust = 1, fontsize = font_size_small, hjust = 0),
add_plot(comparison_pl2, x = 61, y = 114, width = 45, height = 41.8),
add_plot(diff_pl, x = 104, y = 115, width = 25, height = 40.5),
add_text("(D) Characterization of tumor\n\\quad\\quad subpopulation", x = 130, y = 111, vjust = 1, fontsize = font_size, fontface = "bold"),
add_plot(volcano_legend, x = 133, y = 117, width = 36, height = 4),
add_plot(volcano_enrichment_pl + guides(color = "none"), x = 129, y = 119, width = 41, height = 39),
width = 170, height = 159, units = "mm", show_grid_lines = FALSE, latex_support = TRUE,
filename = "../plots/glioblastoma_results.pdf")
## gg[gg1]
## Measuring dimensions of: \bfseries{}(B) Integration with LEMUR adjusting for patient and treatment effect
## Running command: '/Library/TeX/texbin/pdflatex' -interaction=batchmode -halt-on-error -output-directory '/var/folders/dc/tppjxs9x6ll378lq88lz1fm40000gq/T//RtmpsDz71d/tikzDevice59f723cb5e87' 'tikzStringWidthCalc.tex'
## gg[gg2]
## gg[gg3]
## gg[gg4]
## gg[gg5]
## gg[gg6]
## gg[gg7]
## gg[gg8]
## gg[gg9]
## gg[gg10]
## gg[gg11]
## gg[gg12]
## gg[gg13]
## gg[gg14]
## gg[gg15]
## gg[gg16]
## gg[gg17]
## gg[gg18]
## gg[gg19]
## gg[gg20]
## gg[gg21]
## gg[gg22]
## gg[gg23]
## gg[gg24]
## Warning: Removed 3 rows containing missing values or values outside the scale
## range (`geom_point()`).
## gg[gg25]
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
tab <- colData(fit_small) %>%
as_tibble() %>%
mutate(treatment = factor(treatment, levels = c("vehicle (DMSO)", "0.2 uM panobinostat", "2.5 uM etoposide"))) %>%
group_by(patient_id, treatment, pat_cond, age, gender, location) %>%
summarize(n_cells = n(), .groups = "drop") %>%
dplyr::select(`Patient ID` = patient_id, Age = age, Gender = gender,
`Tumor location` = location, Conditon = treatment, `\\# Cells` = n_cells) %>%
mutate(across(everything(), as.character)) %>%
mutate(across(everything(), \(x) ifelse(x == lag(x, default = ""), "", x)), .by = `Patient ID`) %>%
mutate(`Patient ID` = ifelse(`Patient ID` == lag(`Patient ID`, default = ""), "", `Patient ID`)) %>%
kableExtra::kbl(booktabs = TRUE, format = "latex", escape = FALSE, linesep = "") %>%
kableExtra::kable_styling(latex_options = "striped", stripe_index = c(1:2, 5:6, 9:10))
str_split(tab, "\n")[[1]] %>%
magrittr::extract(., 2:(length(.)-1)) %>%
write_lines("../plots/patient_overview_table.tex")
sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: x86_64-apple-darwin20 (64-bit)
## Running under: macOS Sonoma 14.5
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Europe/Berlin
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices datasets utils methods
## [8] base
##
## other attached packages:
## [1] lemur_1.1.5 SingleCellExperiment_1.22.0
## [3] SummarizedExperiment_1.30.2 Biobase_2.60.0
## [5] GenomicRanges_1.52.1 GenomeInfoDb_1.36.4
## [7] IRanges_2.34.1 S4Vectors_0.38.2
## [9] BiocGenerics_0.46.0 MatrixGenerics_1.12.3
## [11] matrixStats_1.0.0 glue_1.6.2
## [13] lubridate_1.9.3 forcats_1.0.0
## [15] stringr_1.5.0 dplyr_1.1.3
## [17] purrr_1.0.2 readr_2.1.4
## [19] tidyr_1.3.0 tibble_3.2.1
## [21] ggplot2_3.5.1 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] splines_4.3.2 BiocIO_1.10.0
## [3] bitops_1.0-7 ggplotify_0.1.2
## [5] polyclip_1.10-6 XML_3.99-0.14
## [7] lifecycle_1.0.3 vroom_1.6.4
## [9] lattice_0.21-9 MASS_7.3-60
## [11] magrittr_2.0.3 sass_0.4.7
## [13] rmarkdown_2.25 jquerylib_0.1.4
## [15] yaml_2.3.7 glmGamPoi_1.12.2
## [17] plotgardener_1.6.4 cowplot_1.1.3
## [19] DBI_1.1.3 RColorBrewer_1.1-3
## [21] abind_1.4-5 zlibbioc_1.46.0
## [23] rvest_1.0.3 ggraph_2.1.0
## [25] RCurl_1.98-1.12 yulab.utils_0.1.0
## [27] tweenr_2.0.2 GenomeInfoDbData_1.2.10
## [29] enrichplot_1.20.3 ggrepel_0.9.4
## [31] tidytree_0.4.5 svglite_2.1.2
## [33] DelayedMatrixStats_1.22.6 codetools_0.2-19
## [35] DelayedArray_0.26.7 xml2_1.3.5
## [37] DOSE_3.26.2 RApiSerialize_0.1.2
## [39] ggforce_0.4.1 tidyselect_1.2.0
## [41] filehash_2.4-5 aplot_0.2.2
## [43] farver_2.1.1 viridis_0.6.4
## [45] webshot_0.5.5 GenomicAlignments_1.36.0
## [47] jsonlite_1.8.7 tidygraph_1.2.3
## [49] systemfonts_1.0.5 tools_4.3.2
## [51] ggnewscale_0.4.9 treeio_1.27.0
## [53] strawr_0.0.91 Rcpp_1.0.11
## [55] gridExtra_2.3 xfun_0.40
## [57] qvalue_2.32.0 withr_2.5.1
## [59] BiocManager_1.30.22 fastmap_1.1.1
## [61] ggh4x_0.2.6 fansi_1.0.5
## [63] digest_0.6.33 timechange_0.2.0
## [65] R6_2.5.1 gridGraphics_0.5-1
## [67] colorspace_2.1-0 GO.db_3.17.0
## [69] Cairo_1.6-1 RSQLite_2.3.1
## [71] utf8_1.2.3 generics_0.1.3
## [73] renv_1.0.3 data.table_1.14.8
## [75] rtracklayer_1.60.1 graphlayouts_1.0.2
## [77] httr_1.4.7 S4Arrays_1.0.6
## [79] scatterpie_0.2.1 pkgconfig_2.0.3
## [81] gtable_0.3.4 blob_1.2.4
## [83] XVector_0.40.0 shadowtext_0.1.2
## [85] clusterProfiler_4.8.3 htmltools_0.5.6.1
## [87] fgsea_1.26.0 plyranges_1.20.0
## [89] kableExtra_1.3.4 scales_1.3.0
## [91] png_0.1-8 ggfun_0.1.3
## [93] knitr_1.44 rstudioapi_0.15.0
## [95] tzdb_0.4.0 reshape2_1.4.4
## [97] rjson_0.2.21 nlme_3.1-163
## [99] curl_5.1.0 org.Hs.eg.db_3.17.0
## [101] cachem_1.0.8 parallel_4.3.2
## [103] vipor_0.4.5 HDO.db_0.99.1
## [105] AnnotationDbi_1.62.2 ggrastr_1.0.2
## [107] restfulr_0.0.15 pillar_1.9.0
## [109] grid_4.3.2 vctrs_0.6.3
## [111] stringfish_0.15.8 beeswarm_0.4.0
## [113] evaluate_0.22 isoband_0.2.7
## [115] cli_3.6.1 compiler_4.3.2
## [117] Rsamtools_2.16.0 rlang_1.1.1
## [119] crayon_1.5.2 ggsignif_0.6.4
## [121] labeling_0.4.3 plyr_1.8.9
## [123] fs_1.6.3 ggbeeswarm_0.7.2
## [125] stringi_1.7.12 viridisLite_0.4.2
## [127] BiocParallel_1.34.2 munsell_0.5.0
## [129] Biostrings_2.68.1 lazyeval_0.2.2
## [131] GOSemSim_2.26.1 Matrix_1.6-1.1
## [133] hms_1.1.3 patchwork_1.1.3
## [135] sparseMatrixStats_1.12.2 bit64_4.0.5
## [137] KEGGREST_1.40.1 qs_0.25.5
## [139] igraph_1.5.1 memoise_2.0.1
## [141] RcppParallel_5.1.7 bslib_0.5.1
## [143] tikzDevice_0.12.5 ggtree_3.8.2
## [145] fastmatch_1.1-4 bit_4.0.5
## [147] downloader_0.4 gson_0.1.0
## [149] ape_5.7-1